tar_load(c(clust_curv_site, clust_curv_site_fac_50))
tar_load(c(clust_curv_site_red_fac_1, clust_curv_site_red_fac_50))
tar_load(c(clust_curv_site_tot_fac_1, clust_curv_site_tot_fac_50))
tar_load(c(site_cl_rm, site_cl_rm_red, site_cl_rm_tot))
tar_load(c(
gaussian_inla_no_drivers_adj_re,
gaussian_inla_exo_no_drivers_adj_re)
)
tot <- rbind(
gaussian_inla_no_drivers_adj_re,
gaussian_inla_exo_no_drivers_adj_re
)
tot %>%
ggplot(aes(x = mean)) +
geom_histogram() +
facet_wrap(vars(response), nrow = 2, scales = "free")
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
tot %>%
group_by(response) %>%
mutate(mean = mean / sd(mean)) %>%
ggplot(aes(x = mean)) +
geom_histogram() +
facet_wrap(vars(response), nrow = 2, scales = "free")
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
pca_clust_red <- compute_rotated_pca(
site_no_drivers_inla_tot[,
clust_var_alter
#clust_var_alter[clust_var_alter != "perc_exo_sp"]
],
naxis = 5
)
axis_l <- list(
c("RC1", "RC2"),
c("RC1", "RC3"),
c("RC1", "RC4"),
c("RC1", "RC5"),
c("RC2", "RC3"),
c("RC2", "RC4"),
c("RC2", "RC5"),
c("RC3", "RC4"),
c("RC3", "RC5"),
c("RC4", "RC5")
)
p_pca_red <- map(axis_l,
~ plot_pca_clust(
.data = pca_clust_red$rotated,
site_cl = site_cl_rm_red,
add_point = FALSE,
add_ellipse = FALSE,
xaxis = .x[1], yaxis = .x[2],
ctb_thld = .2,
replace_var = get_var_replacement(),
size_arrows_segment = 1,
label_size = 2.5,
alpha_point = .2,
lim_x_y = c(-1, 1),
force_pull = 1,
force = 80,
var_scaling_factor = 1
)
)
plot_grid(plotlist = p_pca_red, ncol = 2)
The PCA on the variables displayed in fig 1 shows (Fig. ??) that those variables represent independant facets of biodiversity as the five variables are well represented on 5 axis, each axis explaining the same amountof the variation, i.e. 20%.
pca_clust_tot <- compute_rotated_pca(site_no_drivers_inla_tot, naxis = 5)
p_pca_tot <- map(axis_l,
~ plot_pca_clust(
.data = pca_clust_tot$rotated,
site_cl = site_cl_rm_tot,
add_point = FALSE,
add_ellipse = FALSE,
xaxis = .x[1], yaxis = .x[2],
ctb_thld = .2,
replace_var = get_var_replacement(),
size_arrows_segment = 1,
label_size = 2.5,
alpha_point = .2,
lim_x_y = c(-1, 1),
force_pull = 1,
force = 80,
var_scaling_factor = 1
)
)
plot_grid(plotlist = p_pca_tot, ncol = 2)
Those PCA containing all biodiversity facets shows (Fig. ??) relatively similar results than the one in the main text (i.e. without exotic species variables). We see the same independant partitions: temporal trends of species richness, dissimilarity, percentage of exotic species and abundance, Nestedness and of total abundance are respectively related to axis 1, 2, 3, 4 and 5.
pca_clust <- compute_rotated_pca(site_no_drivers_inla, naxis = 4)
axis_l4 <- axis_l[!map_lgl(axis_l, ~any(str_detect(.x, "RC5")))]
axis_l3 <- axis_l[!map_lgl(axis_l, ~any(str_detect(.x, "RC5|RC4")))]
p_pca <- map(axis_l4,
~ plot_pca_clust(
.data = pca_clust$rotated,
site_cl = site_cl_rm_tot,
add_point = FALSE,
add_ellipse = FALSE,
xaxis = .x[1], yaxis = .x[2],
ctb_thld = .2,
replace_var = get_var_replacement(),
size_arrows_segment = 1,
label_size = 2.5,
alpha_point = .2,
lim_x_y = c(-1, 1),
force_pull = 1,
force = 80,
var_scaling_factor = 1
)
)
plot_grid(plotlist = p_pca, ncol = 2)
Comparing the PCA without (main text and here, Fig. ??) and the PCA containing the exotic species variables (Fig. ??), we see that they contain the same independant axis, except obviously the one related of exoctic species.
In the PCA figure of the main text, I then suggest that we keep all the variables, i.e. including the percentage of exotic species and the percentage of exotic species abundance. Concerning the clusters, the story is a bit different.
plot_grid(plotlist = p_pca_tot[c(1, 5, 6, 7)], ncol = 2
)
The clustering failed with the inclusion of percentage of exotic species and abundance. It resulted in a cluster partition only based on exotic fishes temporal trends. One explanation is that kmeans is known for giving more weight to variable with less variation (ref1). Moreover, it does not make a lot of sense to include variables that mostly have no trends and little variation in a cluster analysis.
target_bp_cl_dist(cl_obj = site_cl_rm_red) +
theme(axis.text.x = element_text(angle = 20, vjust = 0))
target_bp_cl_dist(cl_obj = site_cl_rm_tot) +
theme(axis.text.x = element_text(angle = 20, vjust = 0))
target_bp_cl_dist(cl_obj = site_cl_rm) +
theme(axis.text.x = element_text(angle = 20, vjust = 0))
add_to_boxplot <- site_no_drivers_inla_tot %>%
select(perc_exo_sp, perc_exo_abun) %>%
mutate(across(everything(), ~scale(., center = FALSE)[,1])) %>%
rownames_to_column("siteid") %>%
as_tibble()
target_bp_cl_dist(
cl_obj = left_join(site_cl_rm, add_to_boxplot,
by = "siteid"
)
) +
theme(axis.text.x = element_text(angle = 20, vjust = 0))
So, outcome of PCA and cluster tries conclude that we should stay with all biodiversity facets without exotic species.
plot(clust_curv_site)
plot(clust_curv_site_fac_50)
k4_fac_50_cl <- tclust(
x = site_no_drivers_inla_tot[, clust_var] %>%
mutate(across(everything(), ~scale(., center = FALSE)[,1])),
iter.max = 150,
k = 4,
alpha = 0.05,
restr.fact = 50,
warnings = 2
)
site_k4_fac_50_cl <- get_cluster_df(
tclust_obj = k4_fac_50_cl,
site_env = site_env,
assign_threshold = .5,
clean_method = "rm"
)
target_bp_cl_dist(cl_obj = site_k4_fac_50_cl) +
theme(axis.text.x = element_text(angle = 20, vjust = 0))
k4_fac_1_cl <- tclust(
x = site_no_drivers_inla_tot[, clust_var] %>%
mutate(across(everything(), ~scale(., center = FALSE)[,1])),
iter.max = 150,
k = 4,
alpha = 0.05,
restr.fact = 1,
warnings = 2
)
plot(k4_fac_1_cl)
site_k4_fac_1_cl <- get_cluster_df(
tclust_obj = k4_fac_1_cl,
site_env = site_env,
assign_threshold = .5,
clean_method = "rm"
)
target_bp_cl_dist(cl_obj = site_k4_fac_1_cl) +
theme(axis.text.x = element_text(angle = 20, vjust = 0))
k5_fac_1_cl <- tclust(
x = site_no_drivers_inla_tot[, clust_var] %>%
mutate(across(everything(), ~scale(., center = FALSE)[,1])),
iter.max = 150,
k = 5,
alpha = 0.05,
restr.fact = 1,
warnings = 2
)
plot(k5_fac_1_cl)
site_k5_fac_1_cl <- get_cluster_df(
tclust_obj = k5_fac_1_cl,
site_env = site_env,
assign_threshold = .5,
clean_method = "rm"
)
target_bp_cl_dist(cl_obj = site_k5_fac_1_cl) +
theme(axis.text.x = element_text(angle = 20, vjust = 0))
k5_fac_50_cl <- tclust(
x = site_no_drivers_inla_tot[, clust_var] %>%
mutate(across(everything(), ~scale(., center = FALSE)[, 1])),
iter.max = 150,
k = 5,
alpha = 0.05,
restr.fact = 50,
warnings = 2
)
plot(k5_fac_1_cl)
k6_fac_50_cl <- tclust(
x = site_no_drivers_inla_tot[, clust_var] %>%
mutate(across(everything(), ~scale(., center = FALSE)[, 1])),
iter.max = 150,
k = 6,
alpha = 0.05,
restr.fact = 50,
warnings = 2
)
plot(k6_fac_50_cl)
site_k6_fac_50_cl <- get_cluster_df(
tclust_obj = k6_fac_50_cl,
site_env = site_env,
assign_threshold = .5,
clean_method = "rm"
)
target_bp_cl_dist(cl_obj = site_k6_fac_50_cl) +
theme(axis.text.x = element_text(angle = 20, vjust = 0))
k6_fac_1_cl <- tclust(
x = site_no_drivers_inla_tot[, clust_var] %>%
mutate(across(everything(), ~scale(., center = FALSE)[,1])),
iter.max = 150,
k = 6,
alpha = 0.05,
restr.fact = 1,
warnings = 2
)
plot(k6_fac_1_cl)
site_k6_fac_1_cl <- get_cluster_df(
tclust_obj = k6_fac_1_cl,
site_env = site_env,
assign_threshold = .5,
clean_method = "rm"
)
target_bp_cl_dist(cl_obj = site_k6_fac_1_cl) +
theme(axis.text.x = element_text(angle = 20, vjust = 0))
The added cluster is a super increase in abundance
k7_fac_1_cl <- tclust(
x = site_no_drivers_inla_tot[, clust_var] %>%
mutate(across(everything(), ~scale(., center = FALSE)[,1])),
iter.max = 150,
k = 7,
alpha = 0.05,
restr.fact = 1,
warnings = 2
)
plot(k7_fac_1_cl)
site_k7_fac_1_cl <- get_cluster_df(
tclust_obj = k7_fac_1_cl,
site_env = site_env,
assign_threshold = .5,
clean_method = "rm"
)
target_bp_cl_dist(cl_obj = site_k7_fac_1_cl) +
theme(axis.text.x = element_text(angle = 20, vjust = 0))
The added cluster is one with both increase in species richness and turnover
k8_fac_1_cl <- tclust(
x = site_no_drivers_inla_tot[, clust_var] %>%
mutate(across(everything(), ~scale(., center = FALSE)[,1])),
iter.max = 150,
k = 8,
alpha = 0.05,
restr.fact = 1,
warnings = 2
)
plot(k8_fac_1_cl)
site_k8_fac_1_cl <- get_cluster_df(
tclust_obj = k8_fac_1_cl,
site_env = site_env,
assign_threshold = .5,
clean_method = "rm"
)
target_bp_cl_dist(cl_obj = site_k8_fac_1_cl) +
theme(axis.text.x = element_text(angle = 20, vjust = 0))
plot(clust_curv_site)
plot(clust_curv_site_fac_50)
plot(clust_curv_site_tot_fac_50)
plot(clust_curv_site_tot_fac_1)
plot(clust_curv_site_red_fac_1)
plot(clust_curv_site_red_fac_50)
p_pca_cl <- map(axis_l,
~ plot_pca_clust(
.data = pca_clust_tot$rotated,
site_cl = site_k6_fac_1_cl,
add_point = TRUE,
add_ellipse = FALSE,
xaxis = .x[1], yaxis = .x[2],
ctb_thld = .2,
replace_var = get_var_replacement(),
size_arrows_segment = 1,
label_size = 2.5,
alpha_point = .2,
lim_x_y = c(-3.5, 3.5),
force_pull = 1,
force = 80,
var_scaling_factor = 3.5
)
)
plot_grid(plotlist = p_pca_cl, ncol = 2)
#> Warning: Removed 53 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing missing values (geom_point).
#> Removed 1 rows containing missing values (geom_point).
#> Warning: Removed 53 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing missing values (geom_point).
#> Removed 1 rows containing missing values (geom_point).
#> Warning: Removed 54 rows containing missing values (geom_point).
#> Removed 54 rows containing missing values (geom_point).
#> Warning: Removed 2 rows containing missing values (geom_point).
plot_grid(plotlist = p_pca_cl[c(1, 5, 6, 7)], ncol = 2
)
#> Warning: Removed 53 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing missing values (geom_point).
#> Removed 1 rows containing missing values (geom_point).
p_pca_cl_ell <- map(axis_l,
~ plot_pca_clust(
.data = pca_clust_tot$rotated,
site_cl = site_k6_fac_1_cl,
add_point = TRUE,
add_ellipse = TRUE,
xaxis = .x[1], yaxis = .x[2],
ctb_thld = .2,
replace_var = get_var_replacement(),
size_arrows_segment = 1,
label_size = 2.5,
alpha_point = .2,
lim_x_y = c(-3.5, 3.5),
force_pull = 1,
force = 80,
var_scaling_factor = 3.5
)
)
plot_grid(
plotlist = p_pca_cl_ell[c(1, 5, 6, 7)], ncol = 2
)
#> Warning: Removed 53 rows containing non-finite values (stat_ellipse).
#> Warning: Removed 53 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing non-finite values (stat_ellipse).
#> Warning: Removed 1 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing non-finite values (stat_ellipse).
#> Warning: Removed 1 rows containing missing values (geom_point).
Reproducibility receipt
## datetime
Sys.time()
#> [1] "2022-04-27 23:33:04 CDT"
## repository
if(requireNamespace('git2r', quietly = TRUE)) {
git2r::repository()
} else {
c(
system2("git", args = c("log", "--name-status", "-1"), stdout = TRUE),
system2("git", args = c("remote", "-v"), stdout = TRUE)
)
}
#> [1] "commit 2dec419f9a4959f7315dc81cfe2477bd9c45985b"
#> [2] "Author: Alain Danet <alain.danet@mnhn.fr>"
#> [3] "Date: Tue Apr 26 17:55:26 2022 -0500"
#> [4] ""
#> [5] " Add PCA plot and scaling factor option"
#> [6] ""
#> [7] "M\tR/plot_paper.R"
#> [8] "M\t_targets.R"
#> [9] "origin\tgit@github.com:alaindanet/RivFishTimeBiodiversityFacets.git (fetch)"
#> [10] "origin\tgit@github.com:alaindanet/RivFishTimeBiodiversityFacets.git (push)"
## session info
sessionInfo()
#> R version 4.0.5 (2021-03-31)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Debian GNU/Linux 11 (bullseye)
#>
#> Matrix products: default
#> BLAS: /home/alain/.Renv/versions/4.0.5/lib/R/lib/libRblas.so
#> LAPACK: /home/alain/.Renv/versions/4.0.5/lib/R/lib/libRlapack.so
#>
#> locale:
#> [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
#> [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
#> [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
#>
#> attached base packages:
#> [1] parallel stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] tclust_1.4-2 ggeffects_1.0.1
#> [3] report_0.5.0.9000 see_0.6.8.1
#> [5] correlation_0.8.0 modelbased_0.7.1.1
#> [7] effectsize_0.6.0.2 parameters_0.16.0
#> [9] performance_0.8.0 bayestestR_0.11.5
#> [11] datawizard_0.2.3 insight_0.15.0
#> [13] easystats_0.4.3 glmmTMB_1.1.2.9000
#> [15] inlatools_0.0.1.9001 INLA_21.11.22
#> [17] sp_1.4-6 foreach_1.5.1
#> [19] Matrix_1.3-2 future_1.24.0
#> [21] slider_0.2.2 vegan_2.5-7
#> [23] lattice_0.20-41 permute_0.9-5
#> [25] codyn_2.0.5 janitor_2.1.0
#> [27] viridis_0.6.2 viridisLite_0.4.0
#> [29] cowplot_1.1.1 terra_1.5-21
#> [31] rnaturalearthdata_0.1.0 rnaturalearth_0.1.0
#> [33] sf_1.0-6 scales_1.1.1
#> [35] kableExtra_1.3.4.9000 here_1.0.1
#> [37] lubridate_1.8.0 magrittr_2.0.2
#> [39] forcats_0.5.1 stringr_1.4.0
#> [41] dplyr_1.0.8 purrr_0.3.4
#> [43] readr_2.1.1 tidyr_1.2.0
#> [45] tibble_3.1.6 ggplot2_3.3.5
#> [47] tidyverse_1.3.1 tarchetypes_0.3.2
#> [49] rmarkdown_2.11 nvimcom_0.9-124
#> [51] targets_0.8.1 conflicted_1.1.0
#>
#> loaded via a namespace (and not attached):
#> [1] readxl_1.3.1 backports_1.2.1 systemfonts_1.0.0
#> [4] igraph_1.3.0 TMB_1.7.20 splines_4.0.5
#> [7] listenv_0.8.0 TH.data_1.0-10 digest_0.6.27
#> [10] htmltools_0.5.2 fansi_0.5.0 memoise_2.0.0
#> [13] cluster_2.1.1 tzdb_0.2.0 globals_0.14.0
#> [16] modelr_0.1.8 sandwich_3.0-0 svglite_2.0.0
#> [19] prettyunits_1.1.1 colorspace_2.0-0 ggrepel_0.9.1
#> [22] rvest_1.0.2 warp_0.2.0 haven_2.5.0
#> [25] xfun_0.30 callr_3.7.0 crayon_1.4.2
#> [28] jsonlite_1.7.2 lme4_1.1-26 survival_3.2-10
#> [31] zoo_1.8-8 iterators_1.0.13 glue_1.6.2
#> [34] gtable_0.3.0 emmeans_1.7.2 webshot_0.5.2
#> [37] mvtnorm_1.1-1 DBI_1.1.1 Rcpp_1.0.6
#> [40] progress_1.2.2 xtable_1.8-4 tmvnsim_1.0-2
#> [43] units_0.8-0 httr_1.4.2 ellipsis_0.3.2
#> [46] farver_2.0.3 pkgconfig_2.0.3 sass_0.4.0
#> [49] dbplyr_2.1.1 utf8_1.2.2 labeling_0.4.2
#> [52] tidyselect_1.1.1 rlang_1.0.2 munsell_0.5.0
#> [55] cellranger_1.1.0 tools_4.0.5 cachem_1.0.4
#> [58] cli_3.2.0 generics_0.1.0 ade4_1.7-16
#> [61] sjlabelled_1.1.7 broom_0.7.12 evaluate_0.15
#> [64] fastmap_1.1.0 yaml_2.2.1 processx_3.5.2
#> [67] knitr_1.38 fs_1.5.2 nlme_3.1-152
#> [70] xml2_1.3.2 compiler_4.0.5 rstudioapi_0.13
#> [73] e1071_1.7-4 reprex_2.0.1 statmod_1.4.35
#> [76] bslib_0.2.4 stringi_1.7.6 highr_0.9
#> [79] ps_1.6.0 psych_2.0.12 classInt_0.4-3
#> [82] nloptr_1.2.2.2 vctrs_0.3.8 pillar_1.6.4
#> [85] lifecycle_1.0.1 jquerylib_0.1.3 estimability_1.3
#> [88] data.table_1.13.6 R6_2.5.1 bookdown_0.24
#> [91] KernSmooth_2.23-18 gridExtra_2.3 parallelly_1.30.0
#> [94] codetools_0.2-18 boot_1.3-27 MASS_7.3-53.1
#> [97] assertthat_0.2.1 rprojroot_2.0.2 withr_2.4.3
#> [100] mnormt_2.0.2 multcomp_1.4-16 mgcv_1.8-34
#> [103] hms_1.1.1 grid_4.0.5 coda_0.19-4
#> [106] class_7.3-18 minqa_1.2.4 snakecase_0.11.0
#> [109] numDeriv_2016.8-1.1